Main Figures
Figure 1. Mutant KRAS signaling activates an intrinsic ISG signature.
toil data load and transform
toil.survival <- read_tsv(file.path(output.data.dir, 'tcga.data/TCGA-LUAD.survival.tsv.gz'))
luad.phenotypes <-
read_tsv(file.path(output.data.dir, 'tcga.data/TCGA-LUAD.GDC_phenotype.tsv.gz')) %>%
select(sample = submitter_id.samples,
sample_type = sample_type.samples,
stage = tumor_stage.diagnoses,
age = age_at_index.demographic,
sex = gender.demographic,
race = race.demographic)
luad.mutect <- read_tsv(file.path(output.data.dir, 'tcga.data/TCGA-LUAD.mutect2_snv.tsv.gz')) %>%
filter(gene %in% c('KRAS')) %>%
select(sample = Sample_ID, gene, aa_change = Amino_Acid_Change) %>%
pivot_wider(names_from = gene, values_from = aa_change) %>%
mutate(across(.cols = where(is.list), paste))
luad.phenotypes.merge <- luad.phenotypes %>%
merge(luad.mutect, by = 'sample', all.x = T) %>%
filter(sample_type %in% c('Primary Tumor', 'Solid Tissue Normal')) %>%
merge(toil.survival, by = 'sample') %>%
mutate(sample = str_sub(sample, end = str_length(sample) - 1)) %>%
replace_na(list(KRAS = 'WT')) %>%
mutate(KRAS = ifelse(sample_type == 'Solid Tissue Normal', 'WT_normal', KRAS))
norm.counts.final <- read_tsv(file.path(output.data.dir, 'tcga.data', 'luad.data.frame.tsv'))
norm.counts.final %>%
filter(sample != 'TCGA-44-3917-01') %>%
filter(KRAS %in% c('p.G12D', 'WT_normal')) -> working_toil_counts
complex plotting
plot_complex_heatmap <- function(filter_values, show_genes = F, split_vector = NULL, split_title = NULL) {
set.seed(123)
## heatmap count matrix
working_toil_counts %>%
filter(gene %in% filter_values) %>%
group_by(gene) %>%
mutate(count = scale(log1p(count), center = T)) %>%
select(sample, gene, count) %>%
pivot_wider(names_from = gene, values_from = count) %>%
distinct() %>%
column_to_rownames('sample') %>%
t() -> toil_hm_counts
## heatmap mutation annotation
working_toil_counts %>%
select(sample, 'G12D' = KRAS) %>%
distinct() %>%
column_to_rownames('sample') %>%
ComplexHeatmap::columnAnnotation(df = . ,
col = list(G12D = c('WT_normal' = 'white',
'p.G12D' = viridis(3)[2])),
show_legend = F,
annotation_name_side = 'left',
annotation_name_gp = grid::gpar(fontsize = 8),
simple_anno_size = unit(2.5, "mm")) -> toil_sample_anno
## heatmap DE barpot annotation
working_toil_counts %>%
filter(gene %in% filter_values) %>%
select(gene) %>%
distinct() %>%
merge(salmon.quant$aale$intra$deseq$de %>% select(gene, log2FoldChange), by = 'gene') %>%
column_to_rownames('gene') -> toil_de
toil_de[order(match(rownames(toil_de), rownames(toil_hm_counts))), , drop =F] -> toil_de
toil_de_list <- setNames(toil_de$log2FoldChange, rownames(toil_de))
ComplexHeatmap::rowAnnotation(
`AALE RNA log2FC` = ComplexHeatmap::anno_barplot(x = toil_de_list,
bar_width = 1,
which = 'row',
gp = grid::gpar(col = "black", fill = viridis(3)[2]),
border = FALSE,
annotation_name_gp = grid::gpar(fontsize = 8),
width = unit(1.25, "cm"),
height = unit(0.75, "cm")),
show_annotation_name = TRUE,
annotation_name_gp = grid::gpar(fontsize = 8)) -> log2fc_ha
working_toil_counts %>%
filter(gene %in% filter_values) %>%
select(gene) %>%
distinct() %>%
mutate(IRF9 = ifelse(gene %in% irf9_fimo_results$gene, T, F),
IRF1 = ifelse(gene %in% irf1_fimo_results$gene, T, F),
IRF7 = ifelse(gene %in% irf7_fimo_results$gene, T, F)) %>%
column_to_rownames('gene') %>%
ComplexHeatmap::rowAnnotation(df =.,
col = list(IRF9 = c('TRUE' = viridis(2)[1], 'FALSE' = 'white'),
IRF1 = c('TRUE' = viridis(2)[1], 'FALSE' = 'white'),
IRF7 = c('TRUE' = viridis(2)[1], 'FALSE' = 'white')),
show_legend = F,
annotation_name_gp = grid::gpar(fontsize = 8),
simple_anno_size = unit(2, "mm")) -> gene_motif_anno
ComplexHeatmap::Heatmap(toil_hm_counts,
name = 'TCGA\nz-score',
show_row_names = show_genes,
show_column_names = F,
row_split = split_vector,
row_title = split_title,
row_title_gp = grid::gpar(fontsize = 7, face = 'bold'),
row_names_side = 'right',
row_dend_width = unit(3, 'mm'),
left_annotation = gene_motif_anno,
right_annotation = log2fc_ha,
top_annotation = toil_sample_anno,
heatmap_legend_param =
list(labels_gp = grid::gpar(fontsize = 7),
legend_gp = grid::gpar(fontsize = 7),
legend_direction = 'vertical',
legend_height = unit(3.5, "cm"),
title_gp = grid::gpar(fontsize = 7, face = 'bold')),
row_names_gp = grid::gpar(fontsize = 7),
heatmap_width = patch_fig_width*0.875)
}
consensus_atac_boolean_anno %>%
filter(ctrl_R1.mLb.clN.bool == F) %>%
filter(`Gene Name` %in% filter(salmon.quant$aale$intra$deseq$de, padj < 0.05, log2FoldChange > 1.5)$gene) %>%
# filter(`Gene Name` %in% c(ifn_g, ifn_a)) %>%
separate(Annotation, sep = '\\(', into = c('annotation', NA)) %>%
filter(annotation == "promoter-TSS ") %>%
# group_by(annotation) %>%
group_by(`Gene Name`) %>%
tally() -> aale_uniq_up
salmon.quant$aale$intra$deseq$de %>%
filter(padj < 0.05, log2FoldChange > 1.5, gene %in% c(ifn_a, ifn_g)) -> aale_ifn
unique_peak_aale_ifn_list <- unique(c(aale_uniq_up$`Gene Name`, aale_ifn$gene))
aale_ifn_chm <- plot_complex_heatmap(unique_peak_aale_ifn_list, show_genes = T,
split_vector = 2, split_title = "cluster_%s")
aale_ifn_chm_no_rownames <- aale_ifn_chm
aale_ifn_chm_no_rownames@row_names_param$show <- FALSE
split_rowOrder <- ComplexHeatmap::row_order(ComplexHeatmap::draw(aale_ifn_chm_no_rownames))
aale_ifn_chm <- plot_complex_heatmap(unique_peak_aale_ifn_list, show_genes = T)
aale_ifn_chm_rowOrder <- ComplexHeatmap::row_order(ComplexHeatmap::draw(aale_ifn_chm))
ifn_high_hm_zoom <- aale_ifn_chm[split_rowOrder[[1]], ]
patchwork
msig.df[grepl('INTERFERON', names(msig.df))] %>%
unlist() %>% unname() -> ifn_gene_list
salmon.quant$aale$intra$deseq$de %>%
mutate(padj = ifelse(padj == 0,
min(filter(salmon.quant$aale$intra$deseq$de, padj != 0)$padj), padj)) %>%
mutate(gene_set = ifelse(gene %in% ifn_gene_list,'IFN', 'other'),
gene_set = ifelse(gene %in% msig.df$HALLMARK_KRAS_SIGNALING_UP | gene == 'KRAS', 'KRAS', gene_set),
gene_set = ifelse(gene %in% meta.data$trono.krab.znfs$gene, 'ZNF', gene_set)) %>%
mutate(gene_set = factor(gene_set, levels = c('IFN','KRAS','ZNF', 'other'))) %>%
filter(padj < 0.05, gene_set != 'other') %>%
mutate(kras = ifelse(gene == 'KRAS', T, F)) %>%
ggplot(aes(log2FoldChange, -log10(padj), label = gene, color = kras)) +
geom_point(alpha = 0.6) +
ggrepel::geom_text_repel(aes(label = ifelse(abs(log2FoldChange) > 5 & gene_set != 'other' |
padj < 0.0001 & gene_set != 'other' | gene == "KRAS", gene, '')),
size = txt.mm_to_pts(0.8), segment.color = NA) +
facet_col(~gene_set, scales = 'free_y') +
xlab('RNA log2FoldChange') +
scale_color_manual(values = c('black', 'red'), guide = 'none') +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') -> plt_1A.plt
salmon.quant$aale$intra$deseq$H.fgsea %>%
arrange(NES) %>%
mutate(rank = row_number()) %>%
group_by(pathway) %>%
mutate(pathway = str_replace_all(pathway, 'HALLMARK ', ''),
pathway = str_replace_all(pathway, ' ', '\n'),
observed = length(unlist(leadingEdge)),
NES = round(NES, 3)) %>%
# arrange(-NES, padj) %>%
filter(!grepl('ZNF', pathway)) %>%
ggplot(aes(NES, rank, color = -log10(padj))) +
geom_point(size = 4) +
geom_segment(aes(x=0, xend=NES, y=rank, yend=rank)) +
geom_text(aes(label = paste0(observed, '/', size),
x = NES - 0.75 * sign(NES)),
nudge_y = 0.15,
color = 'black',
size = txt.mm_to_pts(0.8)) +
geom_text(aes(label = pathway,
x = - .9 * sign(NES)),
nudge_y = 0.05,
color = 'black',
size = txt.mm_to_pts(0.8)) +
scale_color_viridis_c() +
ylab('gene set') + xlab('GSEA NES') +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') +
theme(legend.position = c(0.99, 0.17),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) -> plt_1B.plt
salmon.quant$aale$intra$deseq$de %>%
dplyr::rename('AALE' = log2FoldChange) %>%
filter(padj < 0.05,
gene %in% c(ifn_a, ifn_g)) %>%
merge(salmon.quant$hbec$lacz_v_v12$deseq$de %>%
dplyr::rename('HBEC' = log2FoldChange) %>%
filter(padj < 0.05) %>%
mutate(HBEC = HBEC), by = 'gene') %>%
ggplot(aes(AALE, HBEC)) +
geom_point() +
xlab('AALE ISG RNA log2FC') + ylab('HBEC ISG RNA log2FC') +
geom_hline(yintercept = 0, linetype = 'dashed', alpha = 0.4) +
geom_vline(xintercept = 0, linetype = 'dashed', alpha = 0.4) +
geom_text_repel(aes(label = ifelse(AALE > 1 & HBEC > 1, gene, '')),
size = txt.mm_to_pts(0.8),show.legend = F) +
theme(legend.position = c(0.99, 0.22)) -> plt_1C.plt
salmon.quant$aale$intra$deseq$de %>%
merge(ame_tf, by.x = 'gene', by.y = 'TF') %>%
ggplot(aes(log2FoldChange, -log10(padj), label = gene)) +
geom_point() +
geom_text_repel(color = 'black', size = txt.mm_to_pts(0.8)) +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') +
xlab('ISG-binding TF RNA log2FC') +
theme(legend.position = c(0.35, 0.79)) -> plt_1D.plt
fig_1_patch <- "
AB
AB
CD
EE
"
wrap_plots(plt_1A.plt,
plt_1B.plt,
plt_1C.plt,
plt_1D.plt,
patchwork::wrap_elements(full =
grid::grid.grabExpr(ComplexHeatmap::draw(ifn_high_hm_zoom,
heatmap_legend_side = 'right'),
width = patch_fig_width)),
heights = c(1,1,0.75,1.25),
design = fig_1_patch) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = rel(0.95), face = 'bold'),
plot.tag.position = c(0.01, 0.99)) -> figure_1_patchwork.fig
ggsave2(plot = figure_1_patchwork.fig,
filename = file.path(figure.dir, 'fig.1', 'figure_1_patchwork.pdf'),
width = patch_fig_width, height = patch_fig_height, device = cairo_pdf, units = 'mm')
figure_1_patchwork.fig
Figure 2. Mutant KRAS signaling mediates epigenomic reprogramming of ISGs.
rbind(kras_de_prom_df, ctrl_de_prom_df) %>%
mutate(condition = toupper(condition)) %>%
ggplot(aes(position, cov, group = condition, color = condition)) +
stat_summary(geom="ribbon", fun.data=mean_cl_boot, width=0.1, conf.int=0.95,
fill='grey', alpha = 0.1, size = rel(0.05), show.legend = F)+
stat_summary(geom="point", fun=mean, alpha = 1, size = rel(1), show.legend = T) +
scale_color_manual(values = c(viridis(3)[1], viridis(3)[2])) +
xlab('distance to TSS') +
ylab('mean CPM (95% CI)') +
xlim(-1000, 500) +
theme(legend.position = c(0.3, 0.99)) -> plt_2A.plt
salmon.quant$aale$intra$deseq$de %>%
merge(uniq_kras_peak_genes, by.x = 'gene', by.y = 'Gene Name') %>%
mutate(`unique peaks` = ifelse(gene %in% c(ifn_a, ifn_g), 'ISG', 'other')) %>%
ggplot(aes(log2FoldChange, -log10(padj), label = gene, color = `unique peaks`)) +
geom_point() +
scale_color_manual(values = c('black', 'grey'), name = 'unique ATAC peak') +
geom_text_repel(aes(label = ifelse(`unique peaks` == 'ISG' | log2FoldChange > 6, gene, '')),
size = txt.mm_to_pts(0.8), show.legend = F, segment.color = NA) +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') +
xlab('RNA log2FoldChange')+
theme(legend.position = c(0.40, 1)) -> plt_2B.plt
base::do.call(rbind, uniq_kras_great_tb) %>%
group_by(Ontology) %>%
top_n(30, -HyperFdrQ) %>%
filter(HyperFdrQ < 0.05,
NumFgGenesHit > 1,
grepl('interferon|virus|RNA|oligo|^ex', Desc),
!grepl('part|poly', Desc)) %>%
ggplot(aes(log2(RegionFoldEnrich), reorder(Desc, RegionFoldEnrich), label = Desc, color = -log10(HyperFdrQ))) +
geom_point(aes(size = NumFgGenesHit)) +
geom_text_repel(aes(label = Desc),
size = txt.mm_to_pts(0.8), show.legend = F, point.padding = unit(2, 'mm'), color = 'black') +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(size = rel(0.7), face = 'plain'),
legend.position = c(0.99,0.35), legend.direction = 'horizontal') +
scale_x_continuous() +
ylab('GO Term') + xlab('ATAC region GREAT log2(FoldEnrichment)') +
scale_color_viridis_c() -> plt_2D.plt
fig_2_patch <- "
AB
CC
DD
"
wrap_plots(plt_2A.plt,
plt_2B.plt,
wrap_elements(full = ifn_atac_6.plt),
plt_2D.plt,
heights = c(0.75,1.5,0.75),
design = fig_2_patch) +
plot_annotation(tag_levels = 'A') -> figure_2_patchwork.fig
ggsave(plot = figure_2_patchwork.fig,
filename = file.path(figure.dir, 'fig.2', 'figure_2_patchwork.pdf'),
width = patch_fig_width, height = patch_fig_height, device = cairo_pdf, units = 'mm')
figure_2_patchwork.fig
Figure 3. Mutant KRAS signaling induces secretion of TE RNAs and ISGs in EVs.
kras.sight <- read_csv(file.path(output.data.dir, 'kras.nanosight.data.csv'), skip = 2,
col_names = c('size', 'r1', 'r2', 'r3', NA, NA, NA, NA)) %>%
mutate(condition = 'kras')
ctrl.sight <- read_csv(file.path(output.data.dir, 'ctrl.nanosight.data.csv'), skip = 2,
col_names = c('size', 'r1', 'r2', 'r3', NA, NA, NA, NA)) %>%
mutate(condition = 'ctrl')
nano_sight <- rbind(kras.sight, ctrl.sight)
nano_sight %>%
select(size, r1, r2, r3, condition) %>%
gather(rep, value, -condition, -size) %>%
filter(size <= 400) %>%
mutate(value = value/1e6,
condition = paste0(toupper(condition), ' EV')) %>%
group_by(size, condition) %>%
summarize(y = mean(value),
ymin = mean(value) - sd(value),
ymax = mean(value) + sd(value)) %>%
ggplot(aes(size, y = y, ymin = ymin, ymax = ymax, fill = condition)) +
geom_ribbon(alpha = 0.25) +
geom_line(color = 'darkgrey') +
scale_fill_manual(values = c(viridis(3)[1], viridis(3)[2]), guide = F) +
stat_peaks(geom = 'text', span = 35, vjust = -0.5, y.label.fmt = '%#f') +
facet_row(~condition) +
coord_cartesian(clip = 'off') +
ylab('1e6 particles per mL') + xlab('nm') -> plt_3A.plt
salmon.quant$aale$exo$deseq$de %>%
dplyr::rename('EXO_aale' = log2FoldChange) %>%
filter(padj < 0.05) %>%
merge(salmon.quant$aale$intra$deseq$de %>%
dplyr::rename('INTRA_aale' = log2FoldChange) %>%
filter(padj < 0.05), by = 'gene') %>%
lm(EXO_aale ~ INTRA_aale, data = .) %>%
summary() -> exo_lm_out
b <- round(exo_lm_out$coefficients[1,1], 2)
m <- round(exo_lm_out$coefficients[2,1], 2)
r2 <- round(exo_lm_out$adj.r.squared, 2)
salmon.quant$aale$exo$deseq$de %>%
dplyr::rename('EXO_aale' = log2FoldChange) %>%
filter(padj < 0.05) %>%
merge(salmon.quant$aale$intra$deseq$de %>%
dplyr::rename('INTRA_aale' = log2FoldChange) %>%
filter(padj < 0.05), by = 'gene') %>%
mutate(unique = ifelse(gene %in% uniq_kras_peak_genes$`Gene Name`, 'KRAS',
ifelse(gene %in% uniq_ctrl_peak_genes$`Gene Name`, 'CTRL', 'none'))) %>%
ggplot(aes(EXO_aale, INTRA_aale, color = unique)) +
geom_point() +
scale_color_manual(values = c(viridis(3)[1], viridis(3)[2], 'grey'), name = 'unique ATAC peak') +
geom_hline(yintercept = 0, linetype = 'dashed', alpha = 0.4) +
geom_vline(xintercept = 0, linetype = 'dashed', alpha = 0.4) +
geom_text_repel(aes(label = ifelse(abs(EXO_aale) > 1 & abs(INTRA_aale) > 1 | unique != 'none', gene, '')),
size = txt.mm_to_pts(0.8), show.legend = F, segment.color = NA) +
theme(legend.position = c(0.99, 0.23)) +
geom_line(stat = 'smooth', method = 'lm', linetype = 'dashed', alpha = 0.8, color = 'purple') +
annotate(geom = 'text', x = 2.35, y = -2, fontface = 'italic',
label = paste0('y', ' ', ' = ',b,' + ',m,'x; \nadj. R2 = ', r2),
size = rel(3.75), alpha = 0.8) +
ylab('intracellular RNA log2FC') + xlab('extracellular RNA log2FC') -> plt_3B.plt
salmon.quant$aale$exo$deseq$de %>%
mutate(biotype = ifelse(gene %in% meta.data$gencode.35.lnc.gene.names$gene, 'lncRNA', 'coding')) %>%
# filter(padj < 0.05) %>%
ggplot(aes(log2FoldChange, -log10(padj), label = gene, color = biotype)) +
geom_point(alpha = 0.6) +
scale_color_manual(values = c('black', 'grey')) +
ggrepel::geom_text_repel(aes(label = ifelse(abs(log2FoldChange) > 1, gene, '')), size = txt.mm_to_pts(0.8),
show.legend = F) +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') +
xlab('ex. RNA log2FoldChange') +
theme(legend.position = c(0.25,0.99)) -> plt_3C.plt
salmon.quant$aale$intra$deseq$de %>%
filter(padj < 0.05, abs(log2FoldChange) > 0.5) %>%
mutate(context = ifelse(log2FoldChange > 0, 'in_up', 'in_dn')) %>%
select(gene, context) %>%
rbind(
salmon.quant$aale$exo$deseq$de %>%
filter(padj < 0.05, abs(log2FoldChange) > 0.5) %>%
mutate(context = ifelse(log2FoldChange > 0, 'ex_up', 'ex_dn')) %>%
select(gene, context)
) %>%
group_by(gene) %>%
summarize(contexts = list(context)) %>%
ggplot(aes(contexts)) +
geom_bar() +
ggupset::scale_x_upset() +
geom_text(stat='count', aes(label=after_stat(count)), vjust = -0.05, size = txt.mm_to_pts(0.8)) +
xlab('') + ylab('overlapping DE genes') -> plt_3D.plt
salmon.quant$aale$exo$deseq$H.fgsea %>%
filter(pathway %in% salmon.quant$aale$intra$deseq$H.fgsea$pathway) %>%
mutate(context = 'ex.') %>%
rbind(salmon.quant$aale$intra$deseq$H.fgsea %>%
filter(pathway %in% salmon.quant$aale$exo$deseq$H.fgse$pathway) %>%
mutate(context = 'in.')) %>%
arrange(NES) %>%
mutate(rank = row_number()) %>%
group_by(pathway) %>%
mutate(pathway = str_replace_all(pathway, 'HALLMARK ', ''),
observed = length(unlist(leadingEdge)),
NES = round(NES, 3)) %>%
filter(!grepl('ZNF', pathway)) %>%
ggplot(aes(NES, context, color = -log10(padj))) +
geom_point(size = 4) +
geom_segment(aes(x=0, xend=NES, y=context, yend=context)) +
scale_color_viridis_c() +
ylab('context') + xlab('GSEA NES') +
ggforce::facet_col(~pathway, scales = 'free_y') +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') +
theme(legend.position = c(0.25, 0.99),
legend.direction = 'vertical') -> plt_3E.plt
ucsc.rmsk.salmon.quant$aale$exo$deseq$de %>%
filter(!gene %in% meta.data$gencode.35.tx.to.gene$gene,
padj < 0.05) %>%
merge(meta.data$te.families.clades, by = 'gene') %>%
ggplot(aes(log2FoldChange, -log10(padj), label = gene, color = clade)) +
geom_point(alpha = 0.6) +
# xlim(-2, 2) +
scale_color_viridis_d(direction = -1) +
ggrepel::geom_text_repel(aes(label = ifelse(abs(log2FoldChange) >= 1.15, gene, '')), show.legend = F) +
theme(legend.position = c(0.99,0.95)) +
xlab('ex. TE RNA log2FoldChange')-> plt_3F.plt
fig_3_patch <- "
AB
CD
EF
"
wrap_plots(plt_3A.plt,
plt_3C.plt,
plt_3B.plt,
wrap_elements(full = plt_3D.plt),
plt_3E.plt,
plt_3F.plt,
design = fig_3_patch,
heights = c(1, 1, 1)) +
plot_annotation(tag_levels = 'A') -> figure_3_patchwork.fig
ggsave(plot = figure_3_patchwork.fig,
filename = file.path(figure.dir, 'fig.3', 'figure_3_patchwork.pdf'),
width = patch_fig_width, height = patch_fig_height, device = cairo_pdf, units = 'mm')
figure_3_patchwork.fig
Figure 4. Mutant KRAS signaling epigenetically silences KZNFs in vitro.
rbind(znf_kras_de_prom_df, znf_ctrl_de_prom_df) %>%
ggplot(aes(position, cov, group = condition, color = condition)) +
stat_summary(geom="ribbon", fun.data=mean_cl_boot, width=0.1, conf.int=0.95,
fill='grey', alpha = 0.1, size = rel(0.05), show.legend = F)+
stat_summary(geom="point", fun=mean, alpha = 1, size = rel(1)) +
scale_color_manual(values = c(viridis(3)[1], viridis(3)[2])) +
xlab('distance to TSS') +
ylab('mean CPM (95% CI)') +
xlim(-1000, 500) +
theme(legend.position = c(0.30, 0.99)) -> plt_6A.plt
salmon.quant$aale$intra$deseq$de %>%
merge(uniq_ctrl_peak_genes, by.x = 'gene', by.y = 'Gene Name') %>%
mutate(`unique peaks` = ifelse(gene %in% meta.data$trono.krab.znfs$gene | grepl('ZNF', gene), 'ZNF', 'other')) %>%
ggplot(aes(log2FoldChange, -log10(padj), label = gene, color = `unique peaks`)) +
geom_point() +
scale_color_manual(values = c('grey', 'black'), name = 'unique ATAC peak') +
geom_text_repel(aes(label = ifelse(`unique peaks` == 'ZNF' | abs(log2FoldChange) > 4, gene, '')),
size = txt.mm_to_pts(0.8), show.legend = F, max.overlaps = 30, segment.color = NA) +
geom_vline(xintercept = c(0), alpha = c(0.3), linetype = 'dashed') +
xlab('RNA log2FoldChange') +
theme(legend.position = c(0.40, 0.99)) -> plt_6B.plt
salmon.quant$aale$intra$deseq$de %>%
merge(znf_ame_out, by.x = 'gene', by.y = 'TF') %>%
mutate(gene_set = ifelse(gene %in% ifn_gene_list,'IFN', 'other'),
gene_set = ifelse(gene %in% msig.df$HALLMARK_KRAS_SIGNALING_UP, 'KRAS', gene_set),
gene_set = ifelse(gene %in% meta.data$trono.krab.znfs$gene | grepl('^ZNF', gene), 'ZNF', gene_set)) %>%
mutate(gene_set = factor(gene_set, levels = c('KRAS','IFN','ZNF', 'other'))) %>%
ggplot(aes(log2FoldChange, -log10(padj), color = gene_set, label = gene)) +
geom_point() +
scale_color_viridis_d(name = 'TF classification') +
xlab('DE TF RNA log2FoldChange') +
xlim(-2.5,2.5)+
geom_text_repel(aes(label = ifelse(abs(log2FoldChange) > 0, gene, '')), color = 'black') +
geom_vline(xintercept = 0, alpha = 0.3, linetype = 'dashed') +
theme(legend.position = 'top') -> plt_6D.plt
fig_6_patch <- "
AB
CC
DD
"
wrap_plots(plt_6A.plt,
plt_6B.plt,
wrap_elements(full = znf_atac_6.plt),
plt_6D.plt,
# plt_6E.plt,
heights = c(0.75,1.5,0.75),
design = fig_6_patch) +
plot_annotation(tag_levels = 'A') -> figure_6_patchwork.fig
ggsave(plot = figure_6_patchwork.fig,
filename = file.path(figure.dir, 'fig.4', 'figure_4_patchwork.pdf'),
width = patch_fig_width, height = patch_fig_height, device = cairo_pdf, units = 'mm')
figure_6_patchwork.fig
Figure 5. Broad downregulation of KZNFs in mutant KRAS LUAD in vivo.
complex plotting
plot_complex_heatmap <- function(filter_values, show_genes = F, split_vector = NULL, split_title = NULL) {
set.seed(123)
## heatmap count matrix
working_toil_counts %>%
filter(gene %in% filter_values) %>%
group_by(gene) %>%
mutate(count = scale(log1p(count), center = T)) %>%
select(sample, gene, count) %>%
pivot_wider(names_from = gene, values_from = count) %>%
distinct() %>%
column_to_rownames('sample') %>%
t() -> toil_hm_counts
## heatmap mutation annotation
working_toil_counts %>%
select(sample, 'G12D' = KRAS) %>%
distinct() %>%
column_to_rownames('sample') %>%
ComplexHeatmap::columnAnnotation(df = . ,
col = list(G12D = c('WT_normal' = 'white',
'p.G12D' = viridis(3)[2])),
show_legend = F,
annotation_name_side = 'right',
annotation_name_gp = grid::gpar(fontsize = 8),
simple_anno_size = unit(2.5, "mm")) -> toil_sample_anno
## heatmap DE barpot annotation
working_toil_counts %>%
filter(gene %in% filter_values) %>%
select(gene) %>%
distinct() %>%
merge(salmon.quant$aale$intra$deseq$de %>% select(gene, log2FoldChange), by = 'gene') %>%
column_to_rownames('gene') -> toil_de
toil_de[order(match(rownames(toil_de), rownames(toil_hm_counts))), , drop =F] -> toil_de
toil_de_list <- setNames(toil_de$log2FoldChange, rownames(toil_de))
ComplexHeatmap::rowAnnotation(
`AALE RNA log2FC` = ComplexHeatmap::anno_barplot(x = toil_de_list,
bar_width = 1,
which = 'row',
gp = grid::gpar(col = "black", fill = viridis(3)[2]),
border = FALSE,
annotation_name_gp = grid::gpar(fontsize = 8),
width = unit(1.25, "cm"),
height = unit(0.75, "cm")),
show_annotation_name = TRUE,
annotation_name_gp = grid::gpar(fontsize = 8)) -> log2fc_ha
working_toil_counts %>%
filter(gene %in% filter_values) %>%
select(gene) %>%
distinct() %>%
mutate(uniq_peak = ifelse(gene %in% znf_unique_peaks$name, T, F)) %>%
column_to_rownames('gene') %>%
ComplexHeatmap::rowAnnotation(df =.,
col = list(uniq_peak = c('TRUE' = viridis(2)[1], 'FALSE' = 'white')),
show_legend = F, annotation_name_side = 'top',
annotation_name_gp = grid::gpar(fontsize = 8),
simple_anno_size = unit(2, "mm")) -> gene_motif_anno
ComplexHeatmap::Heatmap(toil_hm_counts,
name = 'TCGA\nz-score',
show_row_names = show_genes,
show_column_names = F,
row_split = split_vector,
row_title = split_title,
row_title_gp = grid::gpar(fontsize = 7, face = 'bold'),
row_names_side = 'right',
row_dend_width = unit(3, 'mm'),
left_annotation = gene_motif_anno,
right_annotation = log2fc_ha,
top_annotation = toil_sample_anno,
heatmap_legend_param =
list(labels_gp = grid::gpar(fontsize = 7),
legend_gp = grid::gpar(fontsize = 7),
legend_direction = 'vertical',
legend_height = unit(3.5, "cm"),
title_gp = grid::gpar(fontsize = 7, face = 'bold')),
row_names_gp = grid::gpar(fontsize = 7),
heatmap_width = patch_fig_width*0.875)
}
consensus_atac_boolean_anno %>%
filter(ctrl_R1.mLb.clN.bool == T) %>%
filter(`Gene Name` %in% filter(salmon.quant$aale$intra$deseq$de, padj < 0.05,
log2FoldChange < -1.5)$gene) %>%
filter(`Gene Name` %in% meta.data$trono.krab.znfs$gene | grepl('^ZNF', `Gene Name`)) %>%
separate(Annotation, sep = '\\(', into = c('annotation', NA)) %>%
filter(annotation == "promoter-TSS ") %>%
mutate(strand = '+') %>%
select(chr, start, end, strand, name = `Gene Name`) -> znf_unique_peaks
salmon.quant$aale$intra$deseq$de %>%
filter(padj < 0.05, log2FoldChange < -1, gene %in% meta.data$trono.krab.znfs$gene | grepl('ZNF', gene)) -> aale_znf
aale_znf_chm <- plot_complex_heatmap(aale_znf$gene, show_genes = T, split_vector = NULL, split_title = NULL)
working_toil_counts %>%
filter(gene %in% c(aale_znf$gene)) %>%
group_by(gene) %>%
mutate(count = scale(log1p(count), center = T)) %>%
select(sample, count, gene, KRAS) %>%
group_by(gene) %>%
summarize(w = wilcox.test(count ~ KRAS)$p.value) %>%
filter(w < 0.05) %>%
pull(gene) -> toil_sig_znf
patchwork
working_toil_counts %>%
filter(gene %in% toil_sig_znf) %>%
group_by(gene) %>%
mutate(count = scale(log1p(count), center = T)) %>%
rename('KRAS' = 'G12D') %>%
ggplot(aes(gene, count, color = G12D)) +
geom_boxplot(fill = NA, outlier.colour = NA) +
geom_point(alpha = 0.1, position = position_jitterdodge(jitter.width = 0.25)) +
ylab('z-score') + xlab('') +
scale_color_manual(values = c(viridis(3)[2], viridis(3)[1]), name = 'KRAS') +
theme(axis.text.x = element_text(angle = 40, hjust = 1),
legend.position = c(0.99, 0.25)) -> plt_7B.plt
znf.avg.counts.tidy.stratified <-
norm.counts.final %>%
filter(sample_type == 'Primary Tumor') %>%
filter(gene %in% toil_sig_znf) %>%
select(sample, count) %>%
group_by(sample) %>%
summarize(avg.count = mean(count)) %>%
mutate(gene.set = 'aale_znf_signal') %>%
group_by(gene.set) %>%
mutate(stratum = ifelse(avg.count >= quantile(avg.count, 0.66),
'top-third', ifelse(avg.count <= quantile(avg.count, 0.33),
'bottom-third', 'middle'))) %>%
filter(stratum != 'middle') %>%
spread(gene.set, stratum)
surv.obj <- Surv(time = filter(luad.phenotypes.merge %>% select(sample, OS.time, OS) %>% distinct() ,
sample %in% znf.avg.counts.tidy.stratified$sample)$OS.time,
event = filter(luad.phenotypes.merge %>% select(sample, OS.time, OS) %>% distinct(),
sample %in% znf.avg.counts.tidy.stratified$sample)$OS)
surv.fit <- survfit(as.formula(surv.obj ~ znf.avg.counts.tidy.stratified$aale_znf_signal),
data = znf.avg.counts.tidy.stratified)
ggsurvplot(surv.fit,
pval = T,
legend.title = "",
ggtheme = theme_set_fun() + theme(legend.position = c(0.45,0.95)),
pval.coord = c(0, 0.08),
palette = viridis(3)[c(2,1)]) -> surv.plt
model.plt <- magick::image_read_pdf(path = file.path(figure.dir, 'fig.5','aale_model_cropped.pdf'))
fig_7_patch <- "
AA
BB
CD
"
wrap_plots(patchwork::wrap_elements(full =
grid::grid.grabExpr(ComplexHeatmap::draw(aale_znf_chm,
heatmap_legend_side = 'right'),
width = patch_fig_width)),
plt_7B.plt,
wrap_elements(full = surv.plt$plot),
wrap_elements(full = magick::image_ggplot(model.plt)),
heights = c(1.5,0.75,0.75),
design = fig_7_patch) +
plot_annotation(tag_levels = 'A') -> figure_7_patchwork.fig
ggsave(plot = figure_7_patchwork.fig,
filename = file.path(figure.dir, 'fig.5', 'figure_5_patchwork.pdf'),
width = patch_fig_width, height = patch_fig_height, device = cairo_pdf, units = 'mm')
figure_7_patchwork.fig